####################################
# Bayesian Binary Two-Group Comparison
# Author: Michail Tsikerdekis
# Description: It compares two group binary rates and obtains their difference delta using MCMC. 
# It also tests the null hypothesis based on delta = 0.
####################################

# Clear Workspace
rm(list=ls(all=TRUE))
set.seed=06192014
#setwd("~/Dropbox/BayesChapter/Final Code/") # if source files are not found point to the directory using this command

####################################
# Book data preparation
####################################
source("generate.R") # Generating book data
email$usedcellphone = 0
email$usedcellphone[email$team == 0] = sample(c(0,1),nrow(email[email$team == 0,]),2,prob=c(.80,.20))
email$usedcellphone[email$team == 1] = sample(c(0,1),nrow(email[email$team == 1,]),2,prob=c(.20,.80))

####################################
# Data Input
# s1, s2 represent proportions of characteristic of interest e.g., 2 people out of a group have used the help button
# n1, n2 represent totals for each group
# Priors s1prior, s2prior, f1prior, f2prior = successes + 1 and failures + 1. If both equal this is considered a uniform
# prior with equal probabilities across the board
####################################
s1 = sum(email$usedcellphone[email$team == 0])
s2 = sum(email$usedcellphone[email$team == 1])
n1 = nrow(email[email$team == 0,])
n2 = nrow(email[email$team == 1,])
s1prior = 1
f1prior = 1
s2prior = 1
f2prior = 1

# Model parameters
n.simu <- 50000
n.burnin <- n.simu/2
par <- c("theta1","theta2","delta")

# Hypothesis testing parameters
confint = .95 # Confidence interval
compval = 0 # The null value. With respect to model testing, this is also related to the prior H0 distribution if Bayes Factor is used. For HDI in ROPE this can be virtually anything.
ropeRad = 0.20 # ROPE radius in case ROPE is used to test hypotheses

# Others options
convergencetests <- T
plotting <- T
testbayes <- T
testrope <- T

####################################
# Additional calculated variables
####################################
f1 <- n1 - s1
f2 <- n2 - s2
D <- list(s1 = s1, s2 = s2, n1 = n1, n2 = n2, s1prior = s1prior, s2prior = s2prior, f1prior = f1prior, f2prior = f2prior)

####################################
# Load or Install Required Packages
####################################
packages = c("R.utils","R2jags","MCMCpack","coda","R2OpenBUGS","plyr","ggplot2","plyr","BEST")
for (package in packages){
  if (require(package,character.only=TRUE)) {require(package,character.only=TRUE)} else {
    install.packages(package)
    library(package,character.only=TRUE) # This will ensure that the code fails if the installation failed
  }
}

####################################
# Setup JAGS model
####################################
jags.bin <- function() {
  # Uniform priors for rates of interest
  theta1 ~ dbeta(s1prior,f1prior)
  theta2 ~ dbeta(s2prior,f2prior)
  
  # Binomial distribution for observed counts
  s1 ~ dbin(theta1,n1)
  s2 ~ dbin(theta2,n2)
  
  # Difference between Rates
  delta <- theta1-theta2
}
write.model(jags.bin, "jags.txt")

####################################
# Building model using MCMC sampling
####################################
m.jags <- jags.model("jags.txt", data = D, n.adapt = n.burnin, quiet = TRUE, n.chains = 4)
s <- coda.samples(m.jags, c("theta1","theta2"), n.iter = n.simu - n.burnin, quiet = TRUE)
if (convergencetests){gelman.plot(s)}
s <- coda.samples(m.jags, par, n.iter = n.simu - n.burnin, quiet = TRUE)
if (convergencetests){plot(s)}

# Converting coda-object to data frame object in order to handle chains and variables.
df = as.data.frame(as.matrix( s ))

####################################
# Plotting posterior results
####################################
if (plotting){
# Comparing density posterior plots of two thetas
nrecords = nrow(df)
dfplot = data.frame(thetas = c(df$theta1,df$theta2), cond = c(rep("posterior", nrecords),rep("prior", nrecords)))
cdf <- ddply(dfplot, "cond", summarise, theta.mean=mean(thetas))
print(ggplot(dfplot, aes(x=thetas, fill=cond)) + geom_density(alpha = .3) + theme_minimal() +
  geom_vline(data=cdf, aes(xintercept=theta.mean,  colour=cond), linetype="dashed", size=1))
}


####################################
# Means for Thetas and HDI intervals
####################################
meantheta1 = mean(df$theta1)
meantheta2 = mean(df$theta2)
hditheta1 = hdi(df$theta1,.95)
hditheta2 = hdi(df$theta2,.95)
cat(paste("Group 1: M = ",round(meantheta1,3),", 95% HDI [",round(hditheta1[[1]],3),", ",round(hditheta1[[2]],3),"]\n",sep=""))
cat(paste("Group 2: M = ",round(meantheta2,3),", 95% HDI [",round(hditheta2[[1]],3),", ",round(hditheta2[[2]],3),"]\n",sep=""))

####################################
# Model Comparison
####################################
# Testing Null Hypothesis using Kruschke's HDI and ROPE method
if (testrope) {
source("H0testUsingROPE.R")
H0testUsingROPE(df$delta,compval, ropeRad, confint)
}
